The contours from the MCMC seem to be too large. I'm going to take some points on the chain and plot the emulator prediciton along with the "truth" at that point and see if they make sense. Additionally, part of my concern is that the errors for the emulator are not right. If I draw a lot of samples from the emulator at that point vs several repops, they should be similar.
In [8]:
from pearce.emulator import OriginalRecipe, ExtraCrispy
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [9]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
from chainconsumer import ChainConsumer
In [10]:
training_dir = '/u/ki/swmclau2/des/PearceLHC_wp_z_normal_errors'
em_method = 'gp'
split_method = 'random'
In [11]:
fixed_params = {'z':0.0}#, 'r':0.18477483}
In [12]:
emu = ExtraCrispy(training_dir,10,2,'random', method = em_method, fixed_params=fixed_params)
In [13]:
emu.scale_bin_centers
Out[13]:
In [14]:
emu.get_param_bounds('sigma_logM')
Out[14]:
In [15]:
emulation_point = [('f_c', 0.233), ('logM0', 12.0), ('sigma_logM', 0.333),
('alpha', 1.053),('logM1', 13.5), ('logMmin', 12.033)]
em_params = dict(emulation_point)
em_params.update(fixed_params)
del em_params['z']
param_names = em_params.keys()
In [16]:
cosmo_params = {'simname':'chinchilla', 'Lbox':400.0, 'scale_factors':[1.0]}
cat = cat_dict[cosmo_params['simname']](**cosmo_params)
In [17]:
scale_factor = 1.0
HOD = 'redMagic'
cat.load(scale_factor, HOD=HOD)
In [18]:
rp_bins = list(np.logspace(-1,1.5,19) )
rp_bins.pop(1)
rp_bins = np.array(rp_bins)
rpoints = (rp_bins[1:]+rp_bins[:-1])/2.0
In [19]:
fname = '/u/ki/swmclau2/des/PearceMCMC/100_walkers_5000_steps_chain_large_errors.npy'
In [20]:
chain = np.genfromtxt(fname)
In [22]:
sample_idx = 1
#em_point = dict(zip(param_names, chain[sample_idx, :]))
#em_point = dict(zip(emu.get_param_names(), emu.x[sample_idx, :]))
em_point = dict(zip(emu.get_param_names(), emu.x[sample_idx, :]))
In [23]:
emu._ordered_params
Out[23]:
In [24]:
em_point
Out[24]:
In [25]:
colors = sns.color_palette("husl")
mock_color = colors[0]
emulator_color = colors[2]
In [26]:
print true_wp
In [108]:
true_wp_errs/np.sqrt(n_repops)
Out[108]:
In [123]:
plt.plot(rpoints, (true_wp_samples/true_wp).T, color = mock_color, alpha = 0.7);
plt.ylim([0.95, 1.05])
plt.xscale('log')
In [28]:
emu.emulate_wrt_r(em_point, rpoints)
In [27]:
MAP = chain.mean(axis = 0)
em_point = dict(zip(param_names, MAP))
print np.sum( ( (true_wp-emu.emulate_wrt_r(em_point, rpoints))/(true_wp_errs) )**2)
In [110]:
(true_wp-emu.emulate_wrt_r(em_point, rpoints))/true_wp_errs
Out[110]:
In [111]:
true_wp_errs
Out[111]:
In [112]:
print np.sum( ( (true_nd_samples.mean()-cat.calc_analytic_nd(em_point))/(true_nd_samples.std()) )**2)
In [113]:
cat.calc_analytic_nd(em_point)
Out[113]:
In [114]:
true_nd_samples.mean()
Out[114]:
In [26]:
_emu = emu
_cat = cat
global _emu
global _cat
In [27]:
from itertools import izip
def lnprior(theta, param_names, *args):
"""
Prior for an MCMC. Default is to assume flat prior for all parameters defined by the boundaries the
emulator is built from. Retuns negative infinity if outside bounds or NaN
:param theta:
The parameters proposed by the sampler.
:param param_names
The names identifying the values in theta, needed to extract their boundaries
:return:
Either 0 or -np.inf, depending if the params are allowed or not.
"""
for p, t in izip(param_names, theta):
low, high = _emu.get_param_bounds(p)
if np.isnan(t) or t < low or t > high:
return -np.inf
return 0
def lnlike(theta, param_names, r_bin_centers, y, combined_inv_cov, obs_nd, obs_nd_err, nd_func_name):
"""
:param theta:
Proposed parameters.
:param param_names:
The names of the parameters in theta
:param r_bin_centers:
The centers of the r bins y is measured in, angular or radial.
:param y:
The measured value of the observable to compare to the emulator.
:param combined_inv_cov:
The inverse covariance matrix. Explicitly, the inverse of the sum of the mesurement covaraince matrix
and the matrix from the emulator. Both are independent of emulator parameters, so can be precomputed.
:param obs_nd
Observed number density
:param obs_nd_err
Uncertainty in the observed nd
:param nd_func
Function that can compute the number density given a dictionary of HOD params.
:return:
The log liklihood of theta given the measurements and the emulator.
"""
param_dict = dict(izip(param_names, theta))
y_bar = _emu.emulate_wrt_r(param_dict, r_bin_centers)[0]
# should chi2 be calculated in log or linear?
# answer: the user is responsible for taking the log before it comes here.
delta = y_bar - y
chi2 = -0.5 * np.dot(delta, np.dot(combined_inv_cov, delta))
#print y_bar
#print y
#print obs_nd-getattr(_cat, nd_func_name)(param_dict),obs_nd, getattr(_cat, nd_func_name)(param_dict)
#print chi2,
#return chi2 - 0.5*((obs_nd-getattr(_cat, nd_func_name)(param_dict))/obs_nd_err)**2
return chi2, chi2 - 0.5*((obs_nd-getattr(_cat, nd_func_name)(param_dict))/obs_nd_err)**2
def lnprob(theta, *args):
"""
The total liklihood for an MCMC. Mostly a generic wrapper for the below functions.
:param theta:
Parameters for the proposal
:param args:
Arguments to pass into the liklihood
:return:
Log Liklihood of theta, a float.
"""
lp = lnprior(theta, *args)
if not np.isfinite(lp):
return -np.inf
return lp + lnlike(theta, *args)
In [28]:
from scipy.linalg import inv
cov = np.cov(true_wp_samples.T)
#cov/=n_repops
cov*=10
combined_inv_cov = inv(np.diag(_emu.ycov)+cov)
In [29]:
plt.imshow(combined_inv_cov)
Out[29]:
In [30]:
print np.sqrt(np.diag(cov))
In [31]:
obs_nd = true_nd_samples.mean()
obs_nd_err = true_nd_samples.std()
nd_func_name = 'calc_analytic_nd'
In [32]:
args = (param_names, rpoints, true_wp_samples.mean(axis = 0), combined_inv_cov, obs_nd, obs_nd_err, nd_func_name)
In [33]:
true_point = np.array([em_params[p] for p in param_names])
true_chi2 = lnlike(true_point, *args)
print true_chi2
In [34]:
chi2s = []
chi2nds = []
for i in xrange(chain.shape[0]/200):
sample_idx = i*100+chain.shape[0]/2
chi2, chi2_nd = lnlike(chain[sample_idx, :], *args)
chi2s.append(chi2)
chi2nds.append(chi2s)
chi2s = np.array(chi2s)*-2
chi2nds = np.array(chi2s)*-2
In [35]:
print chi2s.shape
In [36]:
idxs = chi2s < 100
plt.hist(chi2s[idxs], bins = 100);
plt.xlim([0, 100])
plt.vlines(true_chi2[0]*-2,0,200, lw = 2, label = r'Truth $\chi^2$')
plt.title(r'$\chi^2$ for 2500 random chain samples')
plt.legend(loc='best')
plt.show()
In [ ]: